First, let’s look at the TE breakpoint analysis

Dvir_breakpoints <- read.csv('/Users/jullienflynn/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/DAIBAM/Dvir_breakpoint_TEs_forR.csv', header=T)
Dvir_breakpoints
##     Ref_species Inversion    Type base_pairs
## 1          Dvir    In2b_d  DAIBAM          0
## 2          Dvir    In2b_d     DNA          0
## 3          Dvir    In2b_d     LTR          0
## 4          Dvir    In2b_d    LINE          0
## 5          Dvir    In2b_d Unknown          0
## 6          Dvir    In2b_d   Other          0
## 7          Dvir    In2b_d      RC          0
## 8          Dvir    In2b_p  DAIBAM          0
## 9          Dvir    In2b_p     DNA          0
## 10         Dvir    In2b_p     LTR        649
## 11         Dvir    In2b_p    LINE          0
## 12         Dvir    In2b_p Unknown        323
## 13         Dvir    In2b_p   Other          0
## 14         Dvir    In2b_p      RC       3817
## 15         Dvir    In2c_d  DAIBAM          0
## 16         Dvir    In2c_d     DNA          0
## 17         Dvir    In2c_d     LTR        136
## 18         Dvir    In2c_d    LINE        129
## 19         Dvir    In2c_d Unknown        111
## 20         Dvir    In2c_d   Other          0
## 21         Dvir    In2c_d      RC        112
## 22         Dvir    In2c_p  DAIBAM          0
## 23         Dvir    In2c_p     DNA          0
## 24         Dvir    In2c_p     LTR          0
## 25         Dvir    In2c_p    LINE          0
## 26         Dvir    In2c_p Unknown         88
## 27         Dvir    In2c_p   Other          0
## 28         Dvir    In2c_p      RC          0
## 29         Dvir    In2a_d  DAIBAM        738
## 30         Dvir    In2a_d     DNA          0
## 31         Dvir    In2a_d     LTR          0
## 32         Dvir    In2a_d    LINE          0
## 33         Dvir    In2a_d Unknown        506
## 34         Dvir    In2a_d   Other          0
## 35         Dvir    In2a_d      RC          0
## 36         Dvir    In2a_p  DAIBAM       1249
## 37         Dvir    In2a_p     DNA        455
## 38         Dvir    In2a_p     LTR        297
## 39         Dvir    In2a_p    LINE         52
## 40         Dvir    In2a_p Unknown        691
## 41         Dvir    In2a_p   Other          0
## 42         Dvir    In2a_p      RC       1833
## 43         Dvir    In4a_d  DAIBAM          0
## 44         Dvir    In4a_d     DNA          0
## 45         Dvir    In4a_d     LTR          0
## 46         Dvir    In4a_d    LINE          0
## 47         Dvir    In4a_d Unknown          0
## 48         Dvir    In4a_d   Other          0
## 49         Dvir    In4a_d      RC          0
## 50         Dvir    In4a_p  DAIBAM          0
## 51         Dvir    In4a_p     DNA          0
## 52         Dvir    In4a_p     LTR          0
## 53         Dvir    In4a_p    LINE          0
## 54         Dvir    In4a_p Unknown          0
## 55         Dvir    In4a_p   Other          0
## 56         Dvir    In4a_p      RC          0
## 57         Dvir    In5b_d  DAIBAM          0
## 58         Dvir    In5b_d     DNA          0
## 59         Dvir    In5b_d     LTR         68
## 60         Dvir    In5b_d    LINE          0
## 61         Dvir    In5b_d Unknown         77
## 62         Dvir    In5b_d   Other          0
## 63         Dvir    In5b_d      RC          0
## 64         Dvir    In5b_p  DAIBAM          0
## 65         Dvir    In5b_p     DNA          0
## 66         Dvir    In5b_p     LTR          0
## 67         Dvir    In5b_p    LINE          0
## 68         Dvir    In5b_p Unknown          0
## 69         Dvir    In5b_p   Other          0
## 70         Dvir    In5b_p      RC        139
## 71         Dvir    InXc_d  DAIBAM          0
## 72         Dvir    InXc_d     DNA          0
## 73         Dvir    InXc_d     LTR          0
## 74         Dvir    InXc_d    LINE          0
## 75         Dvir    InXc_d Unknown          0
## 76         Dvir    InXc_d   Other          0
## 77         Dvir    InXc_d      RC          0
## 78         Dvir    InXc_p  DAIBAM          0
## 79         Dvir    InXc_p     DNA          0
## 80         Dvir    InXc_p     LTR        786
## 81         Dvir    InXc_p    LINE          0
## 82         Dvir    InXc_p Unknown        322
## 83         Dvir    InXc_p   Other          0
## 84         Dvir    InXc_p      RC        423
## 85         Dvir    InXb_d  DAIBAM          0
## 86         Dvir    InXb_d     DNA          0
## 87         Dvir    InXb_d     LTR          0
## 88         Dvir    InXb_d    LINE          0
## 89         Dvir    InXb_d Unknown          0
## 90         Dvir    InXb_d   Other          0
## 91         Dvir    InXb_d      RC          0
## 92         Dvir    InXb_p  DAIBAM          0
## 93         Dvir    InXb_p     DNA       1006
## 94         Dvir    InXb_p     LTR       2149
## 95         Dvir    InXb_p    LINE       2262
## 96         Dvir    InXb_p Unknown        644
## 97         Dvir    InXb_p   Other          0
## 98         Dvir    InXb_p      RC       8601
## 99         Dvir    InXa_d  DAIBAM        555
## 100        Dvir    InXa_d     DNA          0
## 101        Dvir    InXa_d     LTR          0
## 102        Dvir    InXa_d    LINE          0
## 103        Dvir    InXa_d Unknown          0
## 104        Dvir    InXa_d   Other          0
## 105        Dvir    InXa_d      RC          0
## 106        Dvir    InXa_p  DAIBAM          0
## 107        Dvir    InXa_p     DNA          0
## 108        Dvir    InXa_p     LTR          0
## 109        Dvir    InXa_p    LINE          0
## 110        Dvir    InXa_p Unknown          0
## 111        Dvir    InXa_p   Other          0
## 112        Dvir    InXa_p      RC          0
library(ggplot2)
## Warning in as.POSIXlt.POSIXct(Sys.time()): unknown timezone 'zone/tz/2020a.
## 1.0/zoneinfo/America/New_York'
ggplot() +
  geom_bar(data = Dvir_breakpoints, aes(y = base_pairs, x = Inversion, fill = factor(Type, levels = c("DAIBAM", "DNA", "LTR", "LINE", "Unknown", "Other", "RC"))), stat = "identity", position = 'fill') +
  scale_fill_manual(values = c("#FFCC00", "#33CC66", "#9966FF", "#FF33CC", "#00CCFF", "#CCCCCC", "#FF6600")) +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() + 
  theme(legend.title=element_blank()) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Warning: Removed 42 rows containing missing values (geom_bar).

Dnov_breakpoints <- read.csv('/Users/jullienflynn/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/DAIBAM/Dnov_breakpoints_TEs_forR.csv', header=T)

ggplot() +
  geom_bar(data = Dnov_breakpoints, aes(y = base_pairs, x = Inversion, fill = factor(Type, levels = c("DAIBAM", "DNA", "LTR", "LINE", "Unknown", "Other", "RC"))), stat = "identity", position = 'fill') +
  scale_fill_manual(values = c("#FFCC00", "#33CC66", "#9966FF", "#FF33CC", "#00CCFF", "#CCCCCC", "#FF6600")) +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() + 
  theme(legend.title=element_blank()) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Warning: Removed 7 rows containing missing values (geom_bar).

Dame_breakpoints <- read.csv('/Users/jullienflynn/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/DAIBAM/Dame_breakpoints_TEs_forR.csv', header=T)

ggplot() +
  geom_bar(data = Dame_breakpoints, aes(y = base_pairs, x = Inversion, fill = factor(Type, levels = c("DAIBAM", "DNA", "LTR", "LINE", "Unknown", "Other", "RC"))), stat = "identity", position = 'fill') +
  scale_fill_manual(values = c("#FFCC00", "#33CC66", "#9966FF", "#FF33CC", "#00CCFF", "#CCCCCC", "#FF6600")) +
  scale_y_continuous(labels = scales::percent) +
  theme_bw() + 
  theme(legend.title=element_blank()) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Warning: Removed 42 rows containing missing values (geom_bar).

Genome-wide TE annotation: pie charts

library("ggplot2")


Dvir_masking <- read.csv('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/TE_landscapes/Dvir_masking.csv', header=T)
Dvir_masking
##             Type nt_masked  total_nt perc_mask
## 1           LINE  10245012 189680791  5.401186
## 2            DNA   2823009 189680791  1.488295
## 3            LTR  21901494 189680791 11.546501
## 4 Helitron/DINEs  12751857 189680791  6.722798
## 5          Other   3053549 189680791  1.609836
## 6        Unknown   5100259 189680791  2.688864
## 7         Non-TE 133805611 189680791 70.542521
Dvir_masking$Type <- factor(Dvir_masking$Type, levels=c("DNA", "Helitron/DINEs", "LTR", "LINE", "Other", "Unknown", "Non-TE"))

ggplot (Dvir_masking, aes(x = "", y = perc_mask, fill = Type)) +
  geom_bar(width=1, stat="identity") +
  coord_polar("y", start=0) +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF", "#CCCCCC")) +
  theme_void()

# virilis old genome.
Dvir_masking <- read.csv('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/TE_landscapes/virilis_caf1.csv', header=T)
Dvir_masking
##      Type masked_nt  total_nt  perc_mask
## 1     LTR  17456713 189206400  9.2262804
## 2    LINE   9588644 189206400  5.0678222
## 3     DNA   1338553 189206400  0.7074565
## 4      RC   5670391 189206400  2.9969340
## 5 Unknown  21340230 189206400 11.2788098
## 6  Non-TE 133811869 189206400 70.7226970
Dvir_masking$Type <- factor(Dvir_masking$Type, levels=c("DNA", "RC", "LTR", "LINE", "Unknown", "Non-TE"))

ggplot (Dvir_masking, aes(x = "", y = perc_mask, fill = Type)) +
  geom_bar(width=1, stat="identity") +
  coord_polar("y", start=0) +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#00CCFF", "#CCCCCC")) +
  theme_void()

# Dnovamexicana

Dnov_masking <- read.csv('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/TE_landscapes/Dnov_masking.csv', header=T)
Dnov_masking
##             Type nt_masked  total_nt  perc_mask
## 1           LINE   6281676 182394971  3.4439963
## 2            DNA    894270 182394971  0.4902931
## 3            LTR  11169301 182394971  6.1236891
## 4 Helitron/DINEs  11289884 182394971  6.1898000
## 5          Other   7673241 182394971  4.2069367
## 6        Unknown   5411510 182394971  2.9669184
## 7         Non-TE 139675089 182394971 76.5783663
Dnov_masking$Type <- factor(Dnov_masking$Type, levels=c("DNA", "Helitron/DINEs", "LTR", "LINE", "Other", "Unknown", "Non-TE"))

ggplot (Dnov_masking, aes(x = "", y = perc_mask, fill = Type)) +
  geom_bar(width=1, stat="identity") +
  coord_polar("y", start=0) +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF", "#CCCCCC")) +
  theme_void()

# D americana

Dame_masking <- read.csv('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/TE_landscapes/Dame_masking.csv', header=T)
Dame_masking
##             Type nt_masked  total_nt  perc_mask
## 1           LINE   8062670 211471196  3.8126564
## 2            DNA   1288628 211471196  0.6093634
## 3            LTR  10142700 211471196  4.7962560
## 4 Helitron/DINEs  28521833 211471196 13.4873371
## 5          Other   9166368 211471196  4.3345705
## 6        Unknown   4845328 211471196  2.2912473
## 7         Non-TE 149443669 211471196 70.6685694
Dame_masking$Type <- factor(Dame_masking$Type, levels=c("DNA", "Helitron/DINEs", "LTR", "LINE", "Other", "Unknown", "Non-TE"))

ggplot (Dame_masking, aes(x = "", y = perc_mask, fill = Type)) +
  geom_bar(width=1, stat="identity") +
  coord_polar("y", start=0) +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF", "#CCCCCC")) +
  theme_void()

Genome-wide and Y-specific TE annotation: landscape plots

Dvir_landscape <- read.delim('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/landscape/Feb7/Apr9_dvir_mod/Dvir_genome.fa.out.landscape.Div.Rclass.tab.sed.txt', header=F, sep="\t")
types <- Dvir_landscape$V1
types
## [1] Other   LTR     DNA     DINE    LINE    RC      Unknown
## Levels: DINE DNA LINE LTR Other RC Unknown
Dvir_landscape$V1 <- NULL
rownames(Dvir_landscape) <- types
RC_DINE <- Dvir_landscape["DINE",] + Dvir_landscape["RC",]
to_remove <- c("DINE", "RC")
Dvir_landscape_edit <- Dvir_landscape[!row.names(Dvir_landscape)%in%to_remove,]
Dvir_landscape_edit2 <- rbind(Dvir_landscape_edit, RC_DINE)
Dvir_landscape_edit2
##              V2      V3      V4      V5      V6      V7      V8      V9
## Other        32      88      40     293      47     280    1798    1366
## LTR     2759332 1397409 1317267 1275702 1089392 1326759 1156997 1024066
## DNA      198823   54226  116348  393031  417977  353076  180435  177560
## LINE     888723 1825663  786903  765242  544501  474068  555860  329650
## Unknown  212580   83833  107727  106931  138050  156283  127209  171456
## DINE     145202  118866  157942  330471  626207  817256  803570  827175
##             V10     V11     V12    V13    V14    V15    V16    V17    V18
## Other      5463   29386   75226 117274 159211 298042 461193 339794 382328
## LTR      976129  745479  751488 704309 675488 720830 639797 549795 442516
## DNA      105077   91533   65248  67084  58969  58590  55899  47702  43585
## LINE     232108  164667  185111 234833 265122 192473 168230 175475 184472
## Unknown  138048  146374  196972 199331 146371 155603 148140 180721 179929
## DINE    1061538 1172542 1030390 555089 345377 323574 340841 425320 356125
##            V19    V20    V21    V22    V23    V24    V25    V26    V27
## Other   232149 357786 246011 173314  52782  41802  35676  22189   8836
## LTR     405957 402708 401342 338518 277580 297462 257488 186664 215339
## DNA      48646  40907  35617  34734  32283  23356  22798  21464  19854
## LINE    189020 201204 189857 195557 200234 174941 180720 186384 166611
## Unknown 179076 214708 215945 267813 272711 309622 219828 224705 198708
## DINE    362969 348372 334102 371618 405743 351483 291181 238222 201068
##            V28    V29    V30    V31    V32    V33    V34   V35   V36   V37
## Other     3445   4166   1609   1448     89    263    107    16     0     0
## LTR     195932 168675 192037 189576 230217 157962 122727 82590 37683 24743
## DNA      12098   8392  17669   8791   4888   4002   1468   420   398     0
## LINE    140322 109860  79331  67316  56746  57216  27478 13627 11726  9881
## Unknown 122138 103013  64690  43788  28373  20899   8992  4974  3387   653
## DINE    148273 102247  70323  40556  24305  13928   6778  2149   741   266
##           V38  V39  V40 V41
## Other       0    0    0   0
## LTR     14244 3773 2469 680
## DNA        61    0    0   0
## LINE     2904 8443 2533   0
## Unknown   588   14   76   0
## DINE        0   14    0   0
# ok, got it.

Types <- c()
windows <- c()
Proportion <- c()

for (i in 1:nrow(Dvir_landscape_edit2)) {
  Types <- c(Types, rep(rownames(Dvir_landscape_edit2)[i], times=40)) 
  windows <- c(windows, c(1:40))
  Proportion <- c(Proportion, as.numeric(Dvir_landscape_edit2[i,1:ncol(Dvir_landscape_edit2)]))
}

Dvir_landscape_df <- data.frame(Type=Types, Window=windows, Prop=Proportion)
head(Dvir_landscape_df)
##    Type Window Prop
## 1 Other      1   32
## 2 Other      2   88
## 3 Other      3   40
## 4 Other      4  293
## 5 Other      5   47
## 6 Other      6  280
# still need to divide by the appropriate number
Dvir_genome_size <- 189680791
Dvir_landscape_df$Prop <- Dvir_landscape_df$Prop/Dvir_genome_size

# remove the SINE
Dvir_landscape_df_mod <- Dvir_landscape_df[which(Dvir_landscape_df$Type!="SINE"),]

Dvir_landscape_df_mod$Type <- factor(Dvir_landscape_df_mod$Type, levels=c("DNA", "DINE", "LTR", "LINE", "Other", "Unknown"))

# stacked barplot
ggplot(Dvir_landscape_df_mod, aes(fill=Type, y=Prop, x=Window)) +
  geom_bar(position="stack", stat="identity") +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF")) +
  ylim(0,0.0225) +
  theme_bw()

#### Dvir Y chrom  ####

Dvir_Y_landscape <- read.delim('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/landscape/Feb7/Dvir_Ycontigs.fa.out.landscape.Div.Rclass.tab.sed.txt', header=F, sep="\t")
types <- Dvir_Y_landscape$V1
types
## [1] LTR     RC      LINE    Unknown DINE    DNA    
## Levels: DINE DNA LINE LTR RC Unknown
Dvir_Y_landscape$V1 <- NULL
rownames(Dvir_Y_landscape) <- types
RC_DINE <- Dvir_Y_landscape["DINE",] + Dvir_Y_landscape["RC",]
to_remove <- c("DINE", "RC")
Dvir_Y_landscape_edit <- Dvir_Y_landscape[!row.names(Dvir_Y_landscape)%in%to_remove,]
Dvir_Y_landscape_edit2 <- rbind(Dvir_Y_landscape_edit, RC_DINE)

# ok, got it.

Types <- c()
windows <- c()
Proportion <- c()

for (i in 1:nrow(Dvir_Y_landscape_edit2)) {
  Types <- c(Types, rep(rownames(Dvir_Y_landscape_edit2)[i], times=40)) 
  windows <- c(windows, c(1:40))
  Proportion <- c(Proportion, as.numeric(Dvir_Y_landscape_edit2[i,1:ncol(Dvir_Y_landscape_edit2)]))
}

Dvir_Y_landscape_df <- data.frame(Type=Types, Window=windows, Prop=Proportion)
head(Dvir_Y_landscape_df)
##   Type Window   Prop
## 1  LTR      1 387055
## 2  LTR      2 452240
## 3  LTR      3 600157
## 4  LTR      4 603886
## 5  LTR      5 542391
## 6  LTR      6 549035
# still need to divide by the appropriate number
Dvir_Y_size <- 14824018
Dvir_Y_landscape_df$Prop <- Dvir_Y_landscape_df$Prop/Dvir_Y_size

# remove the SINE
Dvir_Y_landscape_df_mod <- Dvir_Y_landscape_df[which(Dvir_Y_landscape_df$Type!="SINE"),]

Dvir_Y_landscape_df_mod$Type <- factor(Dvir_Y_landscape_df_mod$Type, levels=c("DNA", "DINE", "LTR", "LINE", "Other", "Unknown"))

# stacked barplot
ggplot(Dvir_Y_landscape_df_mod, aes(fill=Type, y=Prop, x=Window)) +
  geom_bar(position="stack", stat="identity") +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC",  "#00CCFF")) +
  ylim(0,0.125) +
  theme_bw()

######
##Dnov

Dnov_landscape <- read.delim('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/landscape/Feb7/Dnov_genome.fa.out.landscape.Div.Rclass.tab.sed.txt', header=F, sep="\t")
types <- Dnov_landscape$V1
types
## [1] LINE    Other   DNA     Unknown LTR     RC      SINE    DINE   
## Levels: DINE DNA LINE LTR Other RC SINE Unknown
Dnov_landscape$V1 <- NULL
rownames(Dnov_landscape) <- types
RC_DINE <- Dnov_landscape["DINE",] + Dnov_landscape["RC",]
to_remove <- c("DINE", "RC")
Dnov_landscape_edit <- Dnov_landscape[!row.names(Dnov_landscape)%in%to_remove,]
Dnov_landscape_edit2 <- rbind(Dnov_landscape_edit, RC_DINE)
Dnov_landscape_edit2
##              V2     V3     V4     V5     V6     V7      V8      V9    V10
## LINE     666640 729920 437873 355817 312038 301572  290409  165507 133375
## Other     29001    991    468 153232  25881  53056  238413   70350 127523
## DNA       61443  25899  53278  57070  40388  38933   28909   31969  29020
## Unknown  576183  89122 112566 160143 135381 121178   94068   95364  82662
## LTR     1370306 739714 611444 570504 596322 458092  428656  418732 438645
## SINE          0      0      0      0      0    222     146       0    440
## DINE      22016 110777 130047 193779 447657 782248 1039331 1132728 885255
##            V11    V12    V13    V14    V15    V16     V17     V18    V19
## LINE    124750 143252 122111 143739 124972 134259  163202  155796 143143
## Other   207687 100252 429010 452917 597315 847750 1794867 1412014 526405
## DNA      39984  32846  27321  24952  22282  18774   29377   40824  48038
## Unknown  90219 104636  93560 126420 154453 140755  161782  166137 192379
## LTR     389911 424319 328714 228280 262493 253967  243258  213140 230401
## SINE        74   1120      0    399    136      0     360     150     75
## DINE    731183 582840 454148 365468 348149 364206  400064  471029 270405
##            V20    V21    V22    V23    V24    V25    V26    V27    V28
## LINE    145807 148281 143347 134430 122546 127465 108247  91694  84593
## Other   336611  94241  81006  45266  11753   9658  15678   4317   2839
## DNA      37417  34165  24274  22080  21838  22812  17225  14084  10571
## Unknown 210071 239042 266531 286149 282942 324164 262746 236653 180718
## LTR     239281 211439 254110 314319 187124 158578 149229 156721 151432
## SINE         0      0      0      0    114      0      0      0      0
## DINE    318400 284890 294826 282726 285688 288269 222889 189121 148098
##            V29    V30    V31    V32    V33    V34   V35   V36   V37   V38
## LINE     69338  65137  76447  79895  68733  59970 42895 20355 26152 10466
## Other     1463   1200    783    688    198    127     0   135   146     0
## DNA      10617   8748   5287   2489   3991   1175  1703  1848   304     0
## Unknown 150874 107046  72175  41392  24904  11675  9240  4321   932  1869
## LTR     150721 174365 206094 174096 127394 123091 98294 48947 23265  9356
## SINE         0      0      0      0      0      0     0     0     0     0
## DINE    103318  67281  33170  21661  10181   5969  1119   579   369     0
##          V39  V40 V41
## LINE    5211 2221  27
## Other      0    0   0
## DNA        0    0   0
## Unknown  652  406   0
## LTR     3335  771   0
## SINE       0    0   0
## DINE       0    0   0
# ok, got it.

Types <- c()
windows <- c()
Proportion <- c()

for (i in 1:nrow(Dnov_landscape_edit2)) {
  Types <- c(Types, rep(rownames(Dnov_landscape_edit2)[i], times=40)) 
  windows <- c(windows, c(1:40))
  Proportion <- c(Proportion, as.numeric(Dnov_landscape_edit2[i,1:ncol(Dnov_landscape_edit2)]))
}

Dnov_landscape_df <- data.frame(Type=Types, Window=windows, Prop=Proportion)

# still need to divide by the appropriate number
Dnov_genome_size <- 182394971
Dnov_landscape_df$Prop <- Dnov_landscape_df$Prop/Dnov_genome_size

# remove the SINE
Dnov_landscape_df_mod <- Dnov_landscape_df[which(Dnov_landscape_df$Type!="SINE"),]

Dnov_landscape_df_mod$Type <- factor(Dnov_landscape_df_mod$Type, levels=c("DNA", "DINE", "LTR", "LINE", "Other", "Unknown"))

# stacked barplot
ggplot(Dnov_landscape_df_mod, aes(fill=Type, y=Prop, x=Window)) +
  geom_bar(position="stack", stat="identity") +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF")) +
  ylim(0,0.0225) +
  theme_bw()

#### Dnov Y chrom ###

Dnov_Y_landscape <- read.delim('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/landscape/Feb7/Dnov_Ycontigs.fa.out.landscape.Div.Rclass.tab.sed.txt', header=F, sep="\t")
types <- Dnov_Y_landscape$V1

Dnov_Y_landscape$V1 <- NULL
rownames(Dnov_Y_landscape) <- types
RC_DINE <- Dnov_Y_landscape["DINE",] + Dnov_Y_landscape["RC",]
to_remove <- c("DINE", "RC")
Dnov_Y_landscape_edit <- Dnov_Y_landscape[!row.names(Dnov_Y_landscape)%in%to_remove,]
Dnov_Y_landscape_edit2 <- rbind(Dnov_Y_landscape_edit, RC_DINE)

# ok, got it.

Types <- c()
windows <- c()
Proportion <- c()

for (i in 1:nrow(Dnov_Y_landscape_edit2)) {
  Types <- c(Types, rep(rownames(Dnov_Y_landscape_edit2)[i], times=40)) 
  windows <- c(windows, c(1:40))
  Proportion <- c(Proportion, as.numeric(Dnov_Y_landscape_edit2[i,1:ncol(Dnov_Y_landscape_edit2)]))
}

Dnov_Y_landscape_df <- data.frame(Type=Types, Window=windows, Prop=Proportion)

# still need to divide by the appropriate number
Dnov_Y_size <- 8975053
Dnov_Y_landscape_df$Prop <- Dnov_Y_landscape_df$Prop/Dnov_Y_size

# remove the SINE
Dnov_Y_landscape_df_mod <- Dnov_Y_landscape_df[which(Dnov_Y_landscape_df$Type!="SINE"),]

Dnov_Y_landscape_df_mod$Type <- factor(Dnov_Y_landscape_df_mod$Type, levels=c("DNA", "DINE", "LTR", "LINE", "Other", "Unknown"))

# stacked barplot
ggplot(Dnov_Y_landscape_df_mod, aes(fill=Type, y=Prop, x=Window)) +
  geom_bar(position="stack", stat="identity") +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF")) +
  ylim(0,0.125) +
  theme_bw()

### Now Dame ####
Dame_landscape <- read.delim('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/landscape/Feb7/Dame_genome.fa.out.landscape.Div.Rclass.tab.sed.txt', header=F, sep="\t")
types <- Dame_landscape$V1

Dame_landscape$V1 <- NULL
rownames(Dame_landscape) <- types
RC_DINE <- Dame_landscape["DINE",] + Dame_landscape["RC",]
to_remove <- c("DINE", "RC")
Dame_landscape_edit <- Dame_landscape[!row.names(Dame_landscape)%in%to_remove,]
Dame_landscape_edit2 <- rbind(Dame_landscape_edit, RC_DINE)


# ok, got it.

Types <- c()
windows <- c()
Proportion <- c()

for (i in 1:nrow(Dame_landscape_edit2)) {
  Types <- c(Types, rep(rownames(Dame_landscape_edit2)[i], times=40)) 
  windows <- c(windows, c(1:40))
  Proportion <- c(Proportion, as.numeric(Dame_landscape_edit2[i,1:ncol(Dame_landscape_edit2)]))
}

Dame_landscape_df <- data.frame(Type=Types, Window=windows, Prop=Proportion)

# still need to divide by the appropriate number
Dame_genome_size <- 211471196
Dame_landscape_df$Prop <- Dame_landscape_df$Prop/Dame_genome_size

# remove the SINE
Dame_landscape_df_mod <- Dame_landscape_df[which(Dame_landscape_df$Type!="SINE"),]

Dame_landscape_df_mod$Type <- factor(Dame_landscape_df_mod$Type, levels=c("DNA", "DINE", "LTR", "LINE", "Other", "Unknown"))

# stacked barplot
ggplot(Dame_landscape_df_mod, aes(fill=Type, y=Prop, x=Window)) +
  geom_bar(position="stack", stat="identity") +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF")) +
  ylim(0,0.0225) +
  theme_bw()

#### Dame Y ######

Dame_Y_landscape <- read.delim('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/landscape/Feb7/Dame_Ycontigs.fa.out.landscape.Div.Rclass.tab.sed.txt', header=F, sep="\t")
types <- Dame_Y_landscape$V1

Dame_Y_landscape$V1 <- NULL
rownames(Dame_Y_landscape) <- types
RC_DINE <- Dame_Y_landscape["DINE",] + Dame_Y_landscape["RC",]
to_remove <- c("DINE", "RC")
Dame_Y_landscape_edit <- Dame_Y_landscape[!row.names(Dame_Y_landscape)%in%to_remove,]
Dame_Y_landscape_edit2 <- rbind(Dame_Y_landscape_edit, RC_DINE)

# ok, got it.

Types <- c()
windows <- c()
Proportion <- c()

for (i in 1:nrow(Dame_Y_landscape_edit2)) {
  Types <- c(Types, rep(rownames(Dame_Y_landscape_edit2)[i], times=40)) 
  windows <- c(windows, c(1:40))
  Proportion <- c(Proportion, as.numeric(Dame_Y_landscape_edit2[i,1:ncol(Dame_Y_landscape_edit2)]))
}

Dame_Y_landscape_df <- data.frame(Type=Types, Window=windows, Prop=Proportion)

# still need to divide by the appropriate number
Dame_Y_size <- 24076955
Dame_Y_landscape_df$Prop <- Dame_Y_landscape_df$Prop/Dame_Y_size

# remove the SINE
Dame_Y_landscape_df_mod <- Dame_Y_landscape_df[which(Dame_Y_landscape_df$Type!="SINE"),]

Dame_Y_landscape_df_mod$Type <- factor(Dame_Y_landscape_df_mod$Type, levels=c("DNA", "DINE", "LTR", "LINE", "Other", "Unknown"))

# stacked barplot
ggplot(Dame_Y_landscape_df_mod, aes(fill=Type, y=Prop, x=Window)) +
  geom_bar(position="stack", stat="identity") +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF")) +
  ylim(0,0.125) +
  theme_bw()

Now, look specifically at LINE differences

Dvir_LINEs <- read.delim('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/landscape/LINEs/Dvir_landscape.LINE.Rfam.txt', header=F, sep="\t")

Dvir_LINEs$V1 <- NULL

Types <- c()
windows <- c()
Proportion <- c()

for (i in 1:nrow(Dvir_LINEs)) {
  Types <- c(Types, rep(as.character(Dvir_LINEs[i,1]), times=40)) 
  windows <- c(windows, c(1:40))
  Proportion <- c(Proportion, as.numeric(Dvir_LINEs[i,2:ncol(Dvir_LINEs)]))
}

Dvir_LINEs_df <- data.frame(Type=Types, Window=windows, Prop=Proportion)
head(Dvir_LINEs_df)
##       Type Window   Prop
## 1 I-Jockey      1 103793
## 2 I-Jockey      2 456773
## 3 I-Jockey      3 148221
## 4 I-Jockey      4 400683
## 5 I-Jockey      5 249661
## 6 I-Jockey      6 141288
# still need to divide by the appropriate number
Dvir_genome_size <- 189680791
Dvir_LINEs_df$Prop <- Dvir_LINEs_df$Prop/Dvir_genome_size

# remove the SINE
Dvir_LINEs_df_mod <- Dvir_LINEs_df[which(Dvir_LINEs_df$Type!="LINE" & Dvir_LINEs_df$Type!="L2" & Dvir_LINEs_df$Type!="R1-LOA" & Dvir_LINEs_df$Type!="I"),]

Dvir_LINEs_df_mod$Type <- factor(Dvir_LINEs_df_mod$Type, levels=c("CR1", "I-Jockey", "Penelope", "R1"))

# stacked barplot
ggplot(Dvir_LINEs_df_mod, aes(fill=Type, y=Prop, x=Window)) +
  geom_bar(position="stack", stat="identity") +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#FFCC00", "#9966FF")) +
  #ylim(0,0.037) +
  theme_bw()

## now Dnov

Dnov_LINEs <- read.delim('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/landscape/LINEs/Dnov_landscape.LINE.Rfam.txt', header=F, sep="\t")

Dnov_LINEs$V1 <- NULL

Types <- c()
windows <- c()
Proportion <- c()

for (i in 1:nrow(Dnov_LINEs)) {
  Types <- c(Types, rep(as.character(Dnov_LINEs[i,1]), times=40)) 
  windows <- c(windows, c(1:40))
  Proportion <- c(Proportion, as.numeric(Dnov_LINEs[i,2:ncol(Dnov_LINEs)]))
}

Dnov_LINEs_df <- data.frame(Type=Types, Window=windows, Prop=Proportion)
head(Dnov_LINEs_df)
##       Type Window   Prop
## 1 I-Jockey      1 460641
## 2 I-Jockey      2 252512
## 3 I-Jockey      3  64731
## 4 I-Jockey      4  20796
## 5 I-Jockey      5  28060
## 6 I-Jockey      6  39746
# still need to divide by the appropriate number
Dnov_genome_size <- 182394971
Dnov_LINEs_df$Prop <- Dnov_LINEs_df$Prop/Dnov_genome_size


# remove the SINE
Dnov_LINEs_df_mod <- Dnov_LINEs_df[which(Dnov_LINEs_df$Type!="LINE" & Dnov_LINEs_df$Type!="L2" & Dnov_LINEs_df$Type!="R1-LOA" & Dnov_LINEs_df$Type!="I"),]

Dnov_LINEs_df_mod$Type <- factor(Dnov_LINEs_df_mod$Type, levels=c("CR1", "I-Jockey", "Penelope", "R1"))

# stacked barplot
ggplot(Dnov_LINEs_df_mod, aes(fill=Type, y=Prop, x=Window)) +
  geom_bar(position="stack", stat="identity") +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#FFCC00", "#9966FF")) +
  ylim(0,0.01) +
  theme_bw()

Now, Y chromosome only annotation: pie charts

Dame_Y <- read.csv('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/Ychrom/Dame_Y_masking.csv', header=T)
Dame_Y
##             Type nt_masked total_nt    perc_mask
## 1           LINE   1906166 24076955  7.916972890
## 2            DNA     40256 24076955  0.167197222
## 3            LTR   3899143 24076955 16.194502170
## 4 Helitron/DINEs  16691516 24076955 69.325693390
## 5          Other       694 24076955  0.002882424
## 6        Unknown    126786 24076955  0.526586522
## 7         Non-TE   1412394 24076955  5.866165385
Dame_Y$Type <- factor(Dame_Y$Type, levels=c("DNA", "Helitron/DINEs", "LTR", "LINE", "Other", "Unknown", "Non-TE"))

ggplot (Dame_Y, aes(x = "", y = perc_mask, fill = Type)) +
  geom_bar(width=1, stat="identity") +
  coord_polar("y", start=0) +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF", "#CCCCCC")) +
  theme_void()

### everything but Y
Dame_nonY <- read.csv('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/Ychrom/Dame_allbutY_masking.csv', header=T)
Dame_nonY
##             Type nt_masked  total_nt  perc_mask
## 1           LINE   6153409 187394241  3.2836703
## 2            DNA   1249870 187394241  0.6669735
## 3            LTR   6211930 187394241  3.3148991
## 4 Helitron/DINEs  11828129 187394241  6.3118957
## 5          Other   9154695 187394241  4.8852595
## 6        Unknown   4683844 187394241  2.4994599
## 7         Non-TE 148112364 187394241 79.0378419
Dame_nonY$Type <- factor(Dame_nonY$Type, levels=c("DNA", "Helitron/DINEs", "LTR", "LINE", "Other", "Unknown", "Non-TE"))

ggplot (Dame_nonY, aes(x = "", y = perc_mask, fill = Type)) +
  geom_bar(width=1, stat="identity") +
  coord_polar("y", start=0) +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF", "#CCCCCC")) +
  theme_void()

Dnov_Y <- read.csv('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/Ychrom/Dnov_Y_masking.csv', header=T)
Dnov_Y
##             Type nt_masked total_nt  perc_mask
## 1           LINE   1721083  8975053 19.1762990
## 2            DNA     19007  8975053  0.2117759
## 3            LTR   3680934  8975053 41.0129500
## 4 Helitron/DINEs   2203574  8975053 24.5522116
## 5          Other     34497  8975053  0.3843654
## 6        Unknown    244302  8975053  2.7220118
## 7         Non-TE   1071656  8975053 11.9403863
Dnov_Y$Type <- factor(Dame_Y$Type, levels=c("DNA", "Helitron/DINEs", "LTR", "LINE", "Other", "Unknown", "Non-TE"))

ggplot (Dnov_Y, aes(x = "", y = perc_mask, fill = Type)) +
  geom_bar(width=1, stat="identity") +
  coord_polar("y", start=0) +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF", "#CCCCCC")) +
  theme_void()

### everything but Y

Dnov_nonY <- read.csv('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/Ychrom/Dnov_allbutY_masking.csv', header=T)

Dnov_nonY$Type <- factor(Dnov_nonY$Type, levels=c("DNA", "Helitron/DINEs", "LTR", "LINE", "Other", "Unknown", "Non-TE"))

ggplot (Dnov_nonY, aes(x = "", y = perc_mask, fill = Type)) +
  geom_bar(width=1, stat="identity") +
  coord_polar("y", start=0) +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF", "#CCCCCC")) +
  theme_void()

Dvir_Y <- read.csv('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/Ychrom/Dvir_Y_masking.csv', header=T)
Dvir_Y
##             Type nt_masked total_nt perc_mask
## 1           LINE   1895813 14824018 12.788793
## 2            DNA    435272 14824018  2.936262
## 3            LTR   6596624 14824018 44.499568
## 4 Helitron/DINEs   4344957 14824018 29.310252
## 5          Other         0 14824018  0.000000
## 6        Unknown    323572 14824018  2.182755
## 7         Non-TE   1227780 14824018  8.282370
Dvir_Y$Type <- factor(Dvir_Y$Type, levels=c("DNA", "Helitron/DINEs", "LTR", "LINE", "Other", "Unknown", "Non-TE"))

ggplot (Dvir_Y, aes(x = "", y = perc_mask, fill = Type)) +
  geom_bar(width=1, stat="identity") +
  coord_polar("y", start=0) +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF", "#CCCCCC")) +
  theme_void()

### everything but Y

Dvir_nonY <- read.csv('~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/Comparative_vir-nov/Ychrom/Dvir_allbutY_masking.csv', header=T)
Dvir_nonY
##             Type nt_masked  total_nt perc_mask
## 1           LINE   8349199 174856773  4.774879
## 2            DNA   2387737 174856773  1.365539
## 3            LTR  15162711 174856773  8.671503
## 4 Helitron/DINEs   8406865 174856773  4.807858
## 5          Other   3053549 174856773  1.746314
## 6        Unknown   4776687 174856773  2.731771
## 7         Non-TE 132720025 174856773 75.902136
Dvir_nonY$Type <- factor(Dvir_nonY$Type, levels=c("DNA", "Helitron/DINEs", "LTR", "LINE", "Other", "Unknown", "Non-TE"))

ggplot (Dvir_nonY, aes(x = "", y = perc_mask, fill = Type)) +
  geom_bar(width=1, stat="identity") +
  coord_polar("y", start=0) +
  scale_fill_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC", "#FFCC00", "#00CCFF", "#CCCCCC")) +
  theme_void()

Now, chromosome-level TE density in 100 kb windows

Dvir_interval_sizes <- read.delim(file='~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/piRNA_sequencing/Spring2020/Muller/Dvir_100kb.intervals.sizes', header=F, sep="\t")

Dvir_TEdens <- read.delim(file='~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/piRNA_sequencing/Spring2020/Muller/Dvir_100kb.intervals_TE.density', header=F, sep="\t", as.is = T)

Dvir_interval_sizes <- read.delim(file='~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/piRNA_sequencing/Spring2020/Muller/Dvir_intervals.sizes', header=F, sep="\t")

Dvir_TEdens <- read.delim(file='~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/piRNA_sequencing/Spring2020/Muller/Dvir_intervals_TE.density', header=F, sep="\t", as.is = T)

Dvir_interval_sizes <- read.delim(file='~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/piRNA_sequencing/Spring2020/Muller/Dame_100kb.intervals.sizes.redo', header=F, sep="\t")

Dvir_TEdens <- read.delim(file='~/Documents/AndyLab/Proposal/ThesisWork/Chapter2/piRNA_sequencing/Spring2020/Muller/Dame_100kb.intervals_TE.density.redo', header=F, sep="\t", as.is = T)


sizes <- as.numeric(Dvir_interval_sizes$V2)
cluster_labels <- Dvir_TEdens$V1
Dvir_TEdens$V1 <- NULL
rownames(Dvir_TEdens) <- cluster_labels
colnames(Dvir_TEdens) <- c("LTR", "RC", "LINE", "DNA", "Unknown")
head(Dvir_TEdens)
##                      LTR    RC LINE DNA Unknown   NA
## Chr_2-0-100000         0 39807    0   0   22737  222
## Chr_2-100000-200000  211    70 3463 183   79523  175
## Chr_2-200000-300000 1999   153 9359  70   10187 1556
## Chr_2-300000-400000  320   179 5572 102    1239  187
## Chr_2-400000-500000  187    70  895  49   84216  996
## Chr_2-500000-600000  260   963  611   0   26485  574
Dvir_TEdens_mx <- data.matrix(Dvir_TEdens)

Dvir_TEdens_norm <- Dvir_TEdens_mx/sizes
nrow(Dvir_TEdens_norm)
## [1] 1594
length(sizes) # need to rerun to get the correct size
## [1] 1594
head(Dvir_TEdens_norm)
##                         LTR      RC    LINE     DNA Unknown    <NA>
## Chr_2-0-100000      0.00000 0.39807 0.00000 0.00000 0.22737 0.00222
## Chr_2-100000-200000 0.00211 0.00070 0.03463 0.00183 0.79523 0.00175
## Chr_2-200000-300000 0.01999 0.00153 0.09359 0.00070 0.10187 0.01556
## Chr_2-300000-400000 0.00320 0.00179 0.05572 0.00102 0.01239 0.00187
## Chr_2-400000-500000 0.00187 0.00070 0.00895 0.00049 0.84216 0.00996
## Chr_2-500000-600000 0.00260 0.00963 0.00611 0.00000 0.26485 0.00574
# now need to put into a different format for plotting

Types <- c()
clusters <- c()
Proportion <- c()
IDs <- c()



for (i in 1:nrow(Dvir_TEdens_norm)) {
  Types <- c(Types, "LTR", "RC", "LINE", "DNA", "Other", "Unknown")
  clusters <- c(clusters, rep(rownames(Dvir_TEdens_norm)[i], times=6))
  Proportion <- c(Proportion, as.numeric(Dvir_TEdens_norm[i,1:ncol(Dvir_TEdens_norm)]))
  IDs <- c(IDs, rep (i, times=6))
}


#Dvir_TEdens_df <- data.frame(Clusters=clusters, Type=Types, Prop=Proportion, Chr=chrom)
Dvir_TEdens_df <- data.frame(Clusters=clusters, Type=Types, Prop=Proportion, ID=IDs)
head(Dvir_TEdens_df)
##         Clusters    Type    Prop ID
## 1 Chr_2-0-100000     LTR 0.00000  1
## 2 Chr_2-0-100000      RC 0.39807  1
## 3 Chr_2-0-100000    LINE 0.00000  1
## 4 Chr_2-0-100000     DNA 0.00000  1
## 5 Chr_2-0-100000   Other 0.22737  1
## 6 Chr_2-0-100000 Unknown 0.00222  1
temp1 <- strsplit(as.character(Dvir_TEdens_df$Clusters), "-")
Dvir_TEdens_df$Chr <- sapply(temp1, function(x) x[1])

Dvir_TEdens_df$Type <- factor(Dvir_TEdens_df$Type, levels=c("DNA", "RC", "LTR", "LINE", "Other", "Unknown"))

# make a plot for each Chr like piRNA clusters.
Dvir_TEdens_df_Chr2 <- subset(Dvir_TEdens_df, Chr == "Chr_2")
head(Dvir_TEdens_df_Chr2)
##         Clusters    Type    Prop ID   Chr
## 1 Chr_2-0-100000     LTR 0.00000  1 Chr_2
## 2 Chr_2-0-100000      RC 0.39807  1 Chr_2
## 3 Chr_2-0-100000    LINE 0.00000  1 Chr_2
## 4 Chr_2-0-100000     DNA 0.00000  1 Chr_2
## 5 Chr_2-0-100000   Other 0.22737  1 Chr_2
## 6 Chr_2-0-100000 Unknown 0.00222  1 Chr_2
# keep working on this.

library(ggplot2)

# x should be ID, not cluster. the clusters will be sorted wrong by R.

ggplot(Dvir_TEdens_df_Chr2, aes(x = ID, y = Prop)) +
  geom_line(aes(color = Type), alpha = 0.7, size = 0.3) +
  scale_color_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC","#FFCC00", "#00CCFF")) +
  ylim(0,1) +
  theme_bw() +
  theme(axis.text.x=element_blank())

Dvir_TEdens_df_Chr3 <- subset(Dvir_TEdens_df, Chr == "Chr_3") 

tail(Dvir_TEdens_df_Chr3)
##                     Clusters    Type        Prop  ID   Chr
## 4021 Chr_3-27800000-27942934     LTR 0.140792254 671 Chr_3
## 4022 Chr_3-27800000-27942934      RC 0.065023018 671 Chr_3
## 4023 Chr_3-27800000-27942934    LINE 0.593819525 671 Chr_3
## 4024 Chr_3-27800000-27942934     DNA 0.003791960 671 Chr_3
## 4025 Chr_3-27800000-27942934   Other 0.001567157 671 Chr_3
## 4026 Chr_3-27800000-27942934 Unknown 0.026886535 671 Chr_3
ggplot(Dvir_TEdens_df_Chr3, aes(x = ID, y = Prop)) +
  geom_line(aes(color = Type), alpha = 0.7, size = 0.3) +
  scale_color_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC","#FFCC00", "#00CCFF")) +
  ylim(0,0.8) +
  theme_bw() +
  theme(axis.text.x=element_blank())

Dvir_TEdens_df_Chr4 <- subset(Dvir_TEdens_df, Chr == "Chr_4" ) 

ggplot(Dvir_TEdens_df_Chr4, aes(x = ID, y = Prop)) +
  geom_line(aes(color = Type), alpha = 0.7, size = 0.3) +
  scale_color_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC","#FFCC00", "#00CCFF")) +
  ylim(0,0.6) +
  theme_bw() +
  theme(axis.text.x=element_blank())

Dvir_TEdens_df_Chr5 <- subset(Dvir_TEdens_df, Chr == "Chr_5") 

ggplot(Dvir_TEdens_df_Chr5, aes(x = ID, y = Prop)) +
  geom_line(aes(color = Type), alpha = 0.7, size = 0.3) +
  scale_color_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC","#FFCC00", "#00CCFF")) +
  ylim(0,1) +
  theme_bw() +
  theme(axis.text.x=element_blank())

Dvir_TEdens_df_Chr6 <- subset(Dvir_TEdens_df, Chr == "Chr_6") 

ggplot(Dvir_TEdens_df_Chr6, aes(x = ID, y = Prop)) +
  geom_line(aes(color = Type), alpha = 0.7, size = 0.3) +
  scale_color_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC","#FFCC00", "#00CCFF")) +
  theme_bw() +
  theme(axis.text.x=element_blank())

Dvir_TEdens_df_ChrX <- subset(Dvir_TEdens_df, Chr == "Chr_X") 

ggplot(Dvir_TEdens_df_ChrX, aes(x = ID, y = Prop)) +
  geom_line(aes(color = Type), alpha = 0.7, size = 0.3) +
  scale_color_manual(values = c("#33CC66", "#FF6600", "#9966FF", "#FF33CC","#FFCC00", "#00CCFF")) +
  ylim(0,1) +
  theme_bw() +
  theme(axis.text.x=element_blank())
## Warning: Removed 2 row(s) containing missing values (geom_path).